********************************************************************************
* SAMPLE SELECTION
********************************************************************************
insheet using "$input_ext/lfs.csv", delimiter("$DELIMITER") clear
reshape long totempl, i(country cl_age dm_gender) j(year)
* In wave 1 France (17) and Belgium (23) interviewed in different years
* I need to specify a single set of weights. I choose the one with more obs
* which is 2004 for France and 2005 for Belgium
g wave=1 if year==2004
replace wave=. if year==2004 & country==23
replace wave=1 if year==2005 & country==23
* In wave 2 all countries interviewed in two years. For all
* but France (17) the prevalent is 2007
replace wave=2 if year==2007
replace wave=. if year==2007 & country==17
replace wave=2 if year==2006 & country==17
replace wave=4 if year==2011
replace wave=5 if year==2013
replace wave=6 if year==2015
keep if wave!=.
g x=1
egen tot=sum(x), by(country cl_age dm_gender wave)
tab tot
keep country wave cl_age dm_gender totempl
sort country wave cl_age dm_gender
save weight_lfs, replace


use complete, clear

* check for missing in baseline variables
tab dm_age_years, miss
tab dm_education, miss
tab dm_education, miss nol
tab dm_bornlocal, miss
tab dm_citizen, miss
tab dm_marstatus, miss nol
tab ex_sunny, miss
tab ex_sunny wave, miss
tab ex_sunny wave, miss nofreq col

tab ex_gov_red_pen, miss
tab ex_gov_age_incr, miss

* check for missing in variables of interest
tab country wave if ex_gov_red_pen!=. & ex_gov_red_pen!=.a & ex_gov_red_pen!=.b
tab country wave if ex_gov_age_incr!=. & ex_gov_age_incr!=.a & ex_gov_age_incr!=.b
tab country wave

* Country selection
* Austria 11, Netherlands 14, Spain 15, Italy 16, France 17, Denmark 18, Switzerland 20, Belgium 23, Germany 12, Sweden 13
keep if country==11 | country==14 | country==15 | country==16 | country==17 | country==18 | country==20 | country==23 | country==12 | country==13
tab country wave if ex_gov_red_pen!=. & ex_gov_red_pen!=.a & ex_gov_red_pen!=.b
tab country wave if ex_gov_age_incr!=. & ex_gov_age_incr!=.a & ex_gov_age_incr!=.b

* In waves 1 and 2 the two question on expectations are asked not only to those who are currently employed, but also to those who
* report to be entitled to a public old age pension
tab country wave if ex_gov_red_pen!=. & ex_gov_red_pen!=.a & ex_gov_red_pen!=.b  ///
 & (cl_pensiond1==1 | cl_pensiond2==1 | cl_pensiond3==1) & em_employment!=2
tab country wave if ex_gov_age_incr!=. & ex_gov_age_incr!=.a & ex_gov_age_incr!=.b  ///
 & (cl_pensiond1==1 | cl_pensiond2==1 | cl_pensiond3==1) & em_employment!=2

* In the fourth wave longitudinal respondents are not asked the question. This generates almost all missing "."
* with few exceptions that are routine errors
tab ex_gov_red_pen wave, miss
tab ex_gov_red_pen wave, miss nofreq col

tab ex_gov_age_incr wave, miss
tab ex_gov_age_incr wave, miss nofreq col

* The proportion of missing for "don't know" or "refusal" is around 5%
tab ex_gov_red_pen wave, miss nofreq col
tab ex_gov_age_incr wave, miss nofreq col

* Final sample
tab country wave	

g cl_age=1 if dm_age_years>=50 & dm_age_years<=54
replace cl_age=2 if dm_age_years>=55 & dm_age_years<=59
replace cl_age=3 if dm_age_years>=60 & dm_age_years<65
label define cl_age 1 "50-54" 2 "55-59" 3 "60-64"

* I clean some of the vars
g orig_dm_education=dm_education

replace dm_education=0 if dm_education==95 | dm_education==97
replace dm_education=4 if dm_education==5 | dm_education==6
replace dm_education=1 if dm_education==0
label define _dm_education 1 "Primary education" 2 "Lower secondary education" 3 "(Upper) secondary education" 4 "Tertiary or post-secondary"
label value dm_education _dm_education

g dm_marstatus2=1 if dm_marstatus==1 | dm_marstatus==2
replace dm_marstatus2=2 if dm_marstatus==3 | dm_marstatus==5
replace dm_marstatus2=3 if dm_marstatus==4
replace dm_marstatus2=4 if dm_marstatus==6
replace dm_marstatus2=dm_marstatus if dm_marstatus==. | dm_marstatus==.a | dm_marstatus==.b
label define dm_marstatus2 1 "Married/partnership" 2 "Divorced/separated" 3 "Never married" 4 "Widowed"
label value dm_marstatus2 dm_marstatus2

replace dm_ghealth=4 if dm_ghealth==5
label define _dm_ghealth 1 "Excellent" 2 "Very good" 3 "Good" 4 "Fair/poor"
label value dm_ghealth _dm_ghealth

g dm_hsize2=dm_hsize
replace dm_hsize2=4 if dm_hsize2>=4 & dm_hsize2!=.
tab dm_hsize2


* bacc bacc_imp bsmf bsmf_imp ydip ydip_imp yself yself_imp rhre rhre_imp fahc fahc_imp hy hy_imp 
foreach var in bacc bacc_imp bsmf bsmf_imp ydip ydip_imp yself yself_imp rhre rhre_imp fahc fahc_imp hy hy_imp hnetw {
	replace `var'=`var'/1000 
	}

g cfood=fahc+fohc
sum hy hrass cfood rhre
g log_hy=log(hy)
replace log_hy=0 if hy<1 
g log_hnetw=log(hnetw)
replace log_hnetw=0 if hnetw<1

g parttime=(em_mainjob_hh<30)
replace parttime=. if em_mainjob_hh==. | em_mainjob_hh==.a | em_mainjob_hh==.b
replace parttime=0 if em_employment!=2

* I put both variables in a [0,1] scale

g orig_ex_gov_age_incr=ex_gov_age_incr
g orig_ex_gov_red_pen=ex_gov_red_pen

g double ex_gov_age_incr2=ex_gov_age_incr/100
g double ex_gov_red_pen2=ex_gov_red_pen/100
g double ex_gov_age_632=ex_gov_age_63/100
g double ex_sunny2=ex_sunny/100
drop ex_gov_age_incr ex_gov_red_pen ex_gov_age_63 ex_sunny
ren ex_gov_age_incr2 ex_gov_age_incr
ren ex_gov_red_pen2 ex_gov_red_pen
ren ex_gov_age_632 ex_gov_age_63
ren ex_sunny2 ex_sunny

* Indices of uncertainty
g ind_gov_age_incr=ex_gov_age_incr*(1-ex_gov_age_incr)
g ind_gov_red_pen=ex_gov_red_pen*(1-ex_gov_red_pen)

save complete_selected, replace

*************************************************************************
* ADD NRA AND REPLACEMENT RATE FROM OECD DOCUMENTS
*************************************************************************
import excel "$input_ext/PAG.xlsx", sheet("Sheet2") firstrow clear
reshape long NRA rate_rpl, j(year) i(country_name country dm_gender)
tostring year, replace
g PAG="PAGreferenceyear"
egen yearPAG=concat(PAG year)
tab yearPAG
drop year PAG
save PAG, replace

use complete_selected, clear
tab int_year
g yearPAG="PAGreferenceyear2002" if int_year==2004
replace yearPAG="PAGreferenceyear2004" if int_year==2005 | int_year==2006
replace yearPAG="PAGreferenceyear2006" if int_year==2007
replace yearPAG="PAGreferenceyear2008" if int_year==2011 | int_year==2012
replace yearPAG="PAGreferenceyear2012" if int_year==2013 | int_year==2015

merge m:1 country dm_gender yearPAG using PAG
tab country yearPAG if _merge==2
drop if _merge==2
drop _merge
tab country country_name
drop country_name
sum NRA
count
sum rate_rpl

replace rate_rpl=rate_rpl/100

save complete_selected, replace

************************************************************
* VARIABLES FOR PENSION ENTITLEMENT
************************************************************
* Theoretical paper: I generate a single variable which likely captures the most relevant margin
use complete_selected, clear

sum cl_pension1_expage if cl_pensiond1!=1
sum cl_pension2_expage if cl_pensiond2!=1
sum cl_pension3_expage if cl_pensiond3!=1
sum cl_pension4_expage if cl_pensiond4!=1
sum cl_pension5_expage if cl_pensiond5!=1
replace cl_pension1_expage=. if cl_pensiond1!=1
replace cl_pension5_expage=. if cl_pensiond5!=1

g entitled=cl_pension==1
replace entitled=. if cl_pension!=0 & cl_pension!=1
table country wave, c(mean entitled)

g entitledP=cl_pensiond1==1
replace entitledP=. if cl_pensiond1!=0 & cl_pensiond1!=1
table country wave, c(mean entitledP)

g double replacementP=cl_pension1_replacement
g double expageP=cl_pension1_expage

table country wave if em_employment==2, c(mean cl_pensiond1)
table country wave if em_employment==2, c(mean cl_pensiond2)
table country wave if em_employment==2, c(mean cl_pensiond3)
table country wave if em_employment==2, c(mean cl_pensiond4)
table country wave if em_employment==2, c(mean cl_pensiond5)

tab cl_pension1_compulsory wave if cl_pensiond1==1, miss nofreq col
bysort country: tab cl_pension4_compulsory wave if cl_pensiond4==1, miss nofreq col
tab cl_pension4_compulsory wave if cl_pensiond4==1 & country!=14, miss nofreq col

* In Denmark, Netherlands, Switzerland, France, Sweden there is a mandatory or widespread occupational retirement scheme
sum cl_pensiond* if country==14
sum cl_pensiond* if country==17
sum cl_pensiond* if country==18
sum cl_pensiond* if country==20
sum cl_pensiond* if country==13
tab cl_pensiond4 wave if country==14 | country==17 | country==18 | country==20 | country==13, miss nofreq col
tab cl_pension4_compulsory wave if cl_pensiond4==1 & (country==14 | country==17 | country==18 | country==20 | country==13), miss nofreq col
tab cl_pension4_compulsory wave if cl_pensiond4==1 & country!=14 & country!=17 & country!=18 & country!=20 & country!=13, miss

replace entitledP=cl_pensiond4 if cl_pensiond4!=0 & (country==14 | country==17 | country==18 | country==20 | country==13)
replace replacementP=replacementP+cl_pension4_replacement if cl_pensiond4==1 & cl_pensiond1==1 & ///
					(country==14 | country==17 | country==18 | country==20 | country==13) 
replace replacementP=.a if (cl_pension1_replacement==.a | cl_pension4_replacement==.a) & cl_pensiond4==1 & cl_pensiond1==1 & ///
					(country==14 | country==17 | country==18 | country==20 | country==13)
replace replacementP=. if (cl_pension1_replacement==. | cl_pension1_replacement==.b | cl_pension4_replacement==. | cl_pension4_replacement==.b) ///
			& cl_pensiond4==1 & cl_pensiond1==1 & (country==14 | country==17 | country==18 | country==20 | country==13) 					
replace replacementP=cl_pension4_replacement if cl_pensiond4==1 & cl_pensiond1==0 & ///
					(country==14 | country==17 | country==18 | country==20 | country==13)
replace expageP=min(expageP, cl_pension4_expage) if cl_pensiond1==1 & cl_pensiond4==1 & (country==14 | country==17 | country==18 | country==20 | country==13)
replace expageP=.a if (cl_pension1_expage==.a | cl_pension4_expage==.a) & cl_pensiond4==1 & cl_pensiond1==1 & ///
					(country==14 | country==17 | country==18 | country==20 | country==13) 					
replace expageP=. if (cl_pension1_expage==. | cl_pension1_expage==.b | cl_pension4_expage==. | cl_pension4_expage==.b) ///
			& cl_pensiond4==1 & cl_pensiond1==1 & (country==14 | country==17 | country==18 | country==20 | country==13) 					
replace expageP=cl_pension4_expage if cl_pensiond1==0 & cl_pensiond4==1 & (country==14 | country==17 | country==18 | country==20 | country==13)

* Attention: in the Netherland, wave 1, there was no question about the public pension
replace replacementP=. if country==14 & wave==1
replace expageP=. if country==14 & wave==1
replace entitledP=. if country==14 & wave==1

tab cl_replacement, miss
tab cl_expage, miss

tab replacementP entitledP, miss
tab expageP entitledP, miss

tab replacementP entitledP, miss nofreq col
tab expageP entitledP, miss nofreq col

tab ex_gov_age_63 wave, miss

tab replacementP entitledP, miss nofreq col
tab expageP entitledP, miss nofreq col

tab country wave if entitledP==1
g noentitledP=entitledP==0
replace noentitledP=. if entitledP==.

tab replacementP if entitledP==1, miss
g double UNKreplace=replacementP==.a
replace UNKreplace=. if replacementP==. | replacementP==.b

table country wave if cl_pension1_replacement, c(mean UNKreplace)

* I put the replacement rate on a [0-1] scale
replace replacementP=replacementP/100

save complete_selected, replace


*********************************
* Empirical paper: I work out the subtleties
use complete_selected, clear

* PUBLIC PENSION
* Some countries do not have category 2
* In these cases, cl_pensiond2 is already a 0 (unless they have
* the introductory question for all pension claims, ep097, missing, 
* so they are correctly missing)
g entitled_public=(cl_pensiond1==1 | cl_pensiond2==1) if country!=13
replace entitled_public=. if ((cl_pensiond1!=0 & cl_pensiond1!=1) | (cl_pensiond2!=0 & cl_pensiond2!=1)) & country!=13
* Sweden, wave 1, has an occupational pension in category 2
replace entitled_public=cl_pensiond1 if country==13
replace entitled_public=. if cl_pensiond1!=0 & cl_pensiond1!=1 & country==13
table country wave if em_employment==2, c(mean entitled_public)

egen expage_public=rowmin(cl_pension1_expage cl_pension2_expage) if entitled_public==1 & country!=13
replace expage_public=. if (cl_pension1_expage==. | cl_pension1_expage==.a | cl_pension1_expage==.b) & cl_pensiond1==1 & country!=13 
replace expage_public=. if (cl_pension2_expage==. | cl_pension2_expage==.a | cl_pension2_expage==.b) & cl_pensiond2==1 & country!=13 

replace expage_public=cl_pension1_expage if country==13
replace expage_public=. if (cl_pension1_expage==. | cl_pension1_expage==.a | cl_pension1_expage==.b) & cl_pensiond1==1 & country==13

egen yrcontr_public=rowmax(cl_pension1_yrcontr cl_pension2_yrcontr) if entitled_public==1 & country!=13
replace yrcontr_public=. if (cl_pension1_yrcontr==. | cl_pension1_yrcontr==.a | cl_pension1_yrcontr==.b) & cl_pensiond1==1 & country!=13
replace yrcontr_public=. if (cl_pension2_yrcontr==. | cl_pension2_yrcontr==.a | cl_pension2_yrcontr==.b) & cl_pensiond2==1 & country!=13

replace yrcontr_public=cl_pension1_yrcontr if country==13
replace yrcontr_public=. if (cl_pension1_yrcontr==. | cl_pension1_yrcontr==.a | cl_pension1_yrcontr==.b) & cl_pensiond1==1 & country==13

* PRIVATE PENSION
* Some countries do not have categories 4-5
* In these cases, cl_pensiond4-5 are already a 0 (unless they have
* the introductory question for all pension claims, ep097, missing, 
* so they are correctly missing)
tab country wave if cl_pensiond6==1
tab country wave if cl_pensiond7==1

g entitled_private=(cl_pensiond4==1 | cl_pensiond5==1) if (country!=13 & country!=18) | (country==13 & wave>=2) | (country==18 & wave>=2)
replace entitled_private=. if ((cl_pensiond4!=0 & cl_pensiond4!=1) | (cl_pensiond5!=0 & cl_pensiond5!=1)) ///
	& ((country!=13 & country!=18) | (country==13 & wave>=2) | (country==18 & wave>=2))

replace entitled_private=(cl_pensiond2==1 | cl_pensiond4==1 | cl_pensiond5==1 | cl_pensiond6==1 | cl_pensiond7==1) if country==13 & wave==1
replace entitled_private=. if ((cl_pensiond2!=0 & cl_pensiond2!=1) | (cl_pensiond4!=0 & cl_pensiond4!=1) | (cl_pensiond5!=0 & cl_pensiond5!=1) ///
	| (cl_pensiond6!=0 & cl_pensiond6!=1) | (cl_pensiond7!=0 & cl_pensiond7!=1)) & country==13 & wave==1

replace entitled_private=(cl_pensiond4==1 | cl_pensiond5==1 | cl_pensiond6==1) if country==20 & wave==2
replace entitled_private=. if ((cl_pensiond4!=0 & cl_pensiond4!=1) | (cl_pensiond5!=0 & cl_pensiond5!=1) ///
	| (cl_pensiond6!=0 & cl_pensiond6!=1)) & country==20 & wave==1

egen expage_private=rowmin(cl_pension4_expage cl_pension5_expage) if entitled_private==1 & ///
			((country!=13 & country!=18) | (country==13 & wave>=2) | (country==18 & wave>=4))
replace expage_private=. if (cl_pension4_expage==. | cl_pension4_expage==.a | cl_pension4_expage==.b)  ///
			 & cl_pensiond4==1 & ((country!=13 & country!=18) | (country==13 & wave>=2) | (country==18 & wave>=2))
replace expage_private=. if (cl_pension5_expage==. | cl_pension5_expage==.a | cl_pension5_expage==.b)  ///
			 & cl_pensiond5==1 & ((country!=13 & country!=18) | (country==13 & wave>=2) | (country==18 & wave>=2))

egen _expage_private=rowmin(cl_pension2_expage cl_pension4_expage cl_pension5_expage cl_pension6_expage cl_pension7_expage) if country==13 & wave==1
replace expage_private=_expage_private if country==13 & wave==1
drop _expage_private
foreach i in 2 4 5 6 7 {
	replace expage_private=. if (cl_pension`i'_expage==. | cl_pension`i'_expage==.a | cl_pension`i'_expage==.b)  ///
			 & cl_pensiond`i'==1 & country==13 & wave==1
}

egen _expage_private=rowmin(cl_pension4_expage cl_pension5_expage cl_pension6_expage) if country==20 & wave==1
replace expage_private=_expage_private if country==20 & wave==1
drop _expage_private
foreach i in 4 5 6 {
	replace expage_private=. if (cl_pension`i'_expage==. | cl_pension`i'_expage==.a | cl_pension`i'_expage==.b)  ///
			 & cl_pensiond`i'==1 & country==20 & wave==1
}

save complete_selected, replace

**********************************************************
* PANEL VARS
**********************************************************
use complete_selected, clear
egen pid=group(mergeid)
xtset pid wave

egen hhid_num=group(hhid_current)
egen minwave=min(wave), by(mergeid)
g _init_hhid=hhid_num if wave==minwave
egen init_hhid=min(_init_hhid), by(mergeid)
save complete_selected, replace


